In [ ]:
import pandas as pd
import plotly.express as px
from IPython.display import display
from typing import List
px.set_mapbox_access_token(open(".mapbox_token").read())
pd.options.display.max_rows = 10
from helpers import  _load_synthetic_clinics, _load_at_risk_zip3, _k_closest_clinics, draw_status_treemap, _load_zip3_census, draw_at_risk_vs_clinic_locations_map

draw_status_treemap()

Available at: https://github.com/parquar/us-census-zips-geo/tree/master/post_roe

In [ ]:
def build_distance_matrix(origin_zip3: List[str], clinics: pd.DataFrame, k:int =10) -> pd.DataFrame:
    #  this takes 2 min at 500 clinics / could consider optimizing
    """
        Iterates through each zip3 <> clinic permutation, 
        and selects the min(distance) k clinic locations
    """
    dist_matrix = pd.concat(
        [_k_closest_clinics(zip3, clinics, k=k) for zip3 in origin_zip3]
    ).reset_index(drop=True)
    return dist_matrix

at_risk = _load_at_risk_zip3(adi_floor=20) # I could use alternate perspective on where to tune to
clinics = _load_synthetic_clinics(n=500) # see helpers.py for "Why Synthetic"

at_risk_distances = build_distance_matrix(at_risk['_zip3'], clinics)
at_risk_distances
Finding distances to 10 closest clinics for each of 572 zip3 origin locations with ADI above 20
Out[ ]:
_state _clinic_zip5 _lat _lng _type _origin_zip3 _distance
0 IL 62963 37.094559 -89.169921 synthetic_clinic 301** 290
1 IL 62906 37.462889 -89.177551 synthetic_clinic 301** 307
2 IL 62820 38.248629 -88.235358 synthetic_clinic 301** 316
3 IL 62862 38.219115 -88.299337 synthetic_clinic 301** 317
4 IL 62932 37.892867 -89.213656 synthetic_clinic 301** 329
... ... ... ... ... ... ... ...
5715 CO 80017 39.699438 -104.775296 synthetic_clinic 828** 362
5716 CO 81652 39.466243 -107.673259 synthetic_clinic 828** 364
5717 CO 80127 39.530726 -105.164344 synthetic_clinic 828** 368
5718 CO 80108 39.444376 -104.853179 synthetic_clinic 828** 378
5719 CO 81251 39.142548 -106.437579 synthetic_clinic 828** 385

5720 rows × 7 columns

In [ ]:
def _get_at_risk_stats(at_risk_distances: pd.DataFrame) -> pd.DataFrame:
    at_risk_stats = at_risk_distances.groupby(['_origin_zip3']).agg(
        k=("_distance","count"),
        distance_mean=("_distance","mean"), # mean distance to closest k clinics
        distance_min=("_distance","min"),
        distance_max=("_distance","max")
    ).reset_index()

    def _geocode_zip3(at_risk_stats):
        return at_risk_stats.rename(columns={"_origin_zip3":"_zip3"}).merge(_load_zip3_census()) #somethings off with _state)
    
    at_risk_stats = _geocode_zip3(at_risk_stats) 
    return at_risk_stats
at_risk_stats = _get_at_risk_stats(at_risk_distances)
at_risk_stats = at_risk_stats[at_risk_stats['_state'] != "NY"] # hmmm something messed up with NY and PA upstream
at_risk_stats
Out[ ]:
_zip3 k distance_mean distance_min distance_max _state _lat _lng _census_total _adi_mean
1 127** 10 42.9 26 54 PA 41.485554 -74.892326 1207 47.000000
3 147** 10 54.5 28 74 PA 42.046328 -79.669830 2540 75.000000
4 150** 10 155.2 100 187 PA 40.457488 -80.065227 448172 65.563380
5 151** 10 150.8 92 180 PA 40.419911 -79.925283 365418 70.925926
6 152** 10 152.5 96 183 PA 40.445441 -79.979009 687276 63.170732
... ... ... ... ... ... ... ... ... ... ...
638 979** 10 207.2 0 253 OR 43.801827 -117.543657 31751 63.200000
639 990** 10 116.0 19 197 ID 47.635828 -117.094140 6264 41.000000
640 990** 10 116.0 19 197 WA 47.619646 -117.441078 142008 50.387097
641 991** 10 105.8 14 184 ID 47.097949 -117.081329 321 62.000000
642 991** 10 105.8 14 184 WA 47.840974 -118.012785 115238 62.625000

641 rows × 10 columns

In [ ]:
px.box(
    at_risk_stats, x="_state", y="distance_mean", 
    title="Range of Mean Distance to 10 Closest Clinics | by State"
).show(renderer="notebook")
In [ ]:
print(at_risk_stats['_adi_mean'].mean())
px.scatter_mapbox(
    at_risk_stats, lat="_lat", lon="_lng", size_max=15, hover_data=['_adi_mean'],
    height=800, zoom=3, size='_census_total', color='distance_mean', 
    title=f"Unprotected Origin Locations | n={len(at_risk_stats)} Zip3 | Color=avg distance to 10 closest clinics | mean={int(at_risk_stats['distance_mean'].mean())} miles"
).show(renderer='notebook')
67.58602319455363
In [ ]:
px.histogram(
    at_risk_stats, x='distance_mean',
    title=f"Median miles protection seeker would need to travel: {at_risk_stats['distance_mean'].median()} miles | from n={len(at_risk_stats)} origins"
).show(renderer="notebook")

NB: a good goal here is to find a way to strategically deploy resources to minimize harm¶

In [ ]:
# def draw_at_risk_vs_clinic_locations_map() -> None:
    
#     """
#         Illustrative of Areas with Protections, vs those without
#         Why syntethic clinics
#     """
#     at_risk = _load_at_risk_zip3()
#     clinics = _load_synthetic_clinics(n=500)
#     locations = pd.concat([clinics,at_risk]).fillna(at_risk['_census_total'].mean())
#     px.scatter_mapbox(
#         locations, lat="_lat", lon="_lng", size_max=15, mapbox_style="open-street-map",
#         height=700, zoom=3, color='_state', hover_data=['_state','_zip3'],
#         size='_census_total',
#         title=f"At Risk Areas (3-Digit Zipcode) vs (Synthetic) Clinic Locations | Scaled by Population"
#     ).show(renderer='notebook')
#     return locations

# draw_at_risk_vs_clinic_locations_map()

_archive¶

below is archive and wip

Simulation: Distance Traveled¶

Goal is to learn what the range of experiences will be as the experience of someone seeking care from an origin point in PA for example...

In [ ]:
def draw_closest_clinics_by_state(states: list):
    at_risk = _load_at_risk_zip3()
    _at_risk = at_risk[at_risk['_state'].isin(states)]
    locations = pd.concat([_at_risk, _load_synthetic_clinics()])

    px.scatter_mapbox(
        locations, lat="_lat", lon="_lng", size_max=15, 
        height=600, zoom=3, color='_type', 
        title=f"At Risk Areas vs (Simulated) 10 Closest Clinic Locations"
    ).show(renderer='notebook')

draw_closest_clinics_by_state(["PA",'LA'])
Finding distances to 10 closest clinics for each of 498 zip3 origin locations with ADI above 50